clear all

// SET MACROS
global Input  "Y:/limited/Michigan_CTE/funding_change/data_derived/imported"
global Output "Y:/limited/Michigan_CTE/funding_change/data_derived"

// LOAD DATA
use $Input/math_reading_score.dta

// RENAME VARIABLES
rename ric student

// CLEAN VARIABLES

// Grade 8 math and reading test scores
bys student (year): gen math_g8_ = mathstdss if mathvalid == 1 & _n == _N

by student (year): gen read_g8_ = readingstdss if readingvalid == 1 & year <= 2014 & _n == _N
by student (year): replace read_g8_ = elastdss if elavalid == 1 & year >= 2015 & _n == _N

by student (year): gegen math_g8 = max(math_g8_)
by student (year): gegen read_g8 = max(read_g8_)

// Impute missing scores
foreach var of varlist math_g8 read_g8 {
  gen `var'_imp = missing(`var')
  replace `var' = 0 if `var'_imp == 1
}

// Average math and reading score
by student (year): gegen test_avg_g8 = mean(math_g8 read_g8)
gen test_avg_sq_g8 = test_avg_g8^2

gen test_avg_g8_imp = (math_g8_imp == 1 | read_g8_imp == 1)

// KEEP VARIABLES
keep student math_g8 read_g8 math_g8_imp read_g8_imp test_avg_g8 test_avg_sq_g8 test_avg_g8_imp

// ORDER VARIABLES
order student math_g8 math_g8_imp read_g8 read_g8_imp test_avg_g8 test_avg_sq_g8 test_avg_g8_imp

// DROP DUPLICATES
gduplicates drop

// FORMAT VARIABLES
format student %16.0f

// VARIABLE LABELS
label var student "Student ID"
label var math_g8 "Grade 8 math score"
label var math_g8_imp "Imputed Grade 8 math score"
label var read_g8 "Grade 8 reading score"
label var read_g8_imp "Imputed Grade 8 reading score"
label var test_avg_g8 "Average Grade 8 math and reading score"
label var test_avg_sq_g8 "Average Grade 8 math and reading score squared"
label var test_avg_g8_imp "Imputed average Grade 8 math and reading score"

// VALUE LABELS

// Label values
label values math_g8_imp read_g8_imp test_avg_g8_imp yesno

// SORT & SAVE
sort student
compress
save ${Output}/math_reading_score_g8.dta, replace
